
cd /Users/xiaosongw/Dropbox/Research/InformedSources/Replication/Build

/* ------------------------- VIC ------------------------------ */
* ----- Load Informed Sources VIC Data
import delimited ./Output/is_vic_p_2005_2019.csv, clear
* keep only Melbourne stations
keep if mel == 1
distinct id coor

gen double t = date(date, "YMD")
format t %td
gen manual = .
replace manual = 1 if collectionmethod == "M"
replace manual = 0 if collectionmethod == "E"

gen p = avgprice / 10
drop avgprice mel date collectionmethod
order id t bid
sort id t

* drop stations with only one or two unique prices 
bysort id p: gen _check = _n == 1
by id: egen _np = sum(_check)
drop if _np <= 2
distinct id
sort id t 
drop _*

* generate a balanced panel
tsset id t
tsfill, full
bysort id (t): replace bid = bid[_n-1] if bid == ""
rename bid bn
label define bnlabel ///
    1 "BP"        ///
	2 "Caltex"	///
    3 "Coles"        ///
	4 "Woolworths" ///
    5 "7-Eleven"         ///
	6 "Other"
encode bn, generate(bid) label(bnlabel)	
order id t bid 
drop bn
save Output/is_mel_p_2005_2019.dta, replace

* add costs
use ./Input/tgp_daily.dta, clear
keep if capital == "MELBOURNE"
keep date c
rename date t
merge 1:m t using Output/is_mel_p_2005_2019
keep if _merge == 3
sort id t
order id t
order c, last
drop _*

/****************** Define price jumps by jump magnitude **********************/
/* This method does not perform well when price cycles are changing over time */
by id: gen dp = p - p[_n-1]
gen marg = p - c
* define price jump
gen is_jump = (dp > 3) & (marg > 5)
replace is_jump = . if missing(dp)
* define failed price jump
by id: gen is_jump_fl = (is_jump == 1) & 	///
				 ((marg[_n+1] < 5)|(marg[_n+2] < 5)|(marg[_n+3] < 5)|(marg[_n+4] < 5))
replace is_jump_fl = . if missing(dp)
save Output/is_mel_p_2005_2019, replace

/********************** Define market price cycles ****************************/
use Output/is_mel_p_2005_2019, clear
collapse (mean) pavg = p (sum) n_jump=is_jump n_jump_fl=is_jump_fl, by(t)

* Cycle peak
// ssc install dataex
// ssc install rangestat
rangestat (max) _pmax1 = pavg, interval(t -2 2) 	/* find local maximum */
gen _cyc_peak1 = (pavg == _pmax1)
rangestat (max) _pmax2 = pavg, interval(t -5 5) 	/* find local maximum */
gen _cyc_peak2 = (pavg == _pmax2)

gen cyc_peak = .
replace cyc_peak = 1 if _cyc_peak1 == 1 & t < td(1jan2014)
replace cyc_peak = 1 if _cyc_peak2 == 1 & t >= td(1jan2014)
replace cyc_peak = . if _n <= 5
replace cyc_peak = . if _n > _N-5
replace cyc_peak = 0 if missing(cyc_peak)
* create cycle id based on cycle peak
gen cyc_id = sum(cyc_peak)
bysort cyc_id (t): gen cyc_day = _n
drop _*
save Output/mel_cycle_2005_2019.dta, replace


* merge market-level cycle back into the daily data 
keep t cyc_peak cyc_id cyc_day
merge 1:m t using Output/is_mel_p_2005_2019.dta
sort id t 
order cyc_peak cyc_id cyc_day, last
drop _*
save Output/is_mel_p_2005_2019.dta, replace


/********************** Define station-level price cycles *********************/
use Output/is_mel_p_2005_2019.dta, clear
sort id t 
rangestat (max) _pmax1 = p, interval(t -2 2) by(id)
bysort id (t): replace _pmax1 = . if _n <= 2 | _n >= _N - 2
gen _is_max1 = p == _pmax1 & !missing(p)
bysort id (t): gen _st_resto1 = _is_max1 == 1
sum _st_resto1
di "number of initial restoration="r(sum)
replace _st_resto1 = 0 if dp <= 0 	/* exclude local maximum in constant price */
rangestat (min) _pmin = p, interval(t -3 0) by(id)
gen _dp = p - _pmin
replace _st_resto1 = 0 if _dp < 5	/* exclude local maximum near the bottom of a cycle */
sum _st_resto1
di "number of restoration in melbourne="r(sum) 

rangestat (max) _pmax2 = p, interval(t -5 5) by(id)
bysort id (t): replace _pmax2 = . if _n <= 2 | _n >= _N - 2
gen _is_max2 = p == _pmax2 & !missing(p)
bysort id (t): gen _st_resto2 = _is_max2 == 1
sum _st_resto2
di "number of initial restoration="r(sum)
replace _st_resto2 = 0 if dp <= 0 	/* exclude local maximum in constant price */
replace _st_resto2 = 0 if _dp < 7	/* exclude local maximum near the bottom of a cycle */
sum _st_resto2
di "number of restoration in melbourne="r(sum) 

gen st_resto = 0
replace st_resto = 1 if _st_resto1 == 1 & t < td(1jan2014)
replace st_resto = 1 if _st_resto2 == 1 & t >= td(1jan2014)
drop _*
bysort id (t): gen st_cyc_id = sum(st_resto) 
bysort id st_cyc_id (t): gen st_cyc_day = _n - 1 
by id st_cyc_id: egen st_cyc_len = max(st_cyc_day)
replace st_cyc_len = st_cyc_len + 1

* if a station price cycle has many missing values, then drop this cycle
egen _nmiss = total(missing(p)), by(id st_cyc_id)
replace st_cyc_id = . if _nmiss > st_cyc_len / 2 | _nmiss > 30
replace st_cyc_day = . if st_cyc_id == .
replace st_cyc_len = . if st_cyc_id == .
drop _*


gen st_cyc_day_norm = 0 if st_cyc_day == 0
forval i = 1/10	{
	replace st_cyc_day_norm = `i' if (st_cyc_day >= ((st_cyc_len - 1) * (`i' - 1) / 10 + 1)) & 	///
								   (st_cyc_day <= ((st_cyc_len - 1) * `i' / 10 + 1))
}
replace st_cyc_day_norm = . if missing(st_cyc_id)
replace st_cyc_day_norm = . if st_cyc_len <= 10
// * drop last cycle day 
// replace st_cyc_day_norm = . if st_cyc_day == st_cyc_len - 1

save Output/is_mel_p_2005_2019.dta, replace



/************************ Define Brand Cycle *********************************/
forval ib = 1/5 {
	
	use Output/is_mel_p_2005_2019.dta, clear
	collapse (mean) pavg = p (sum) n_jump=is_jump n_jump_fl=is_jump_fl if bid==`ib', by(t)

		if (`ib'==1) {
			local retailer = "bp"
		}
		if(`ib'==2){
			local retailer="cal"
		}
		if(`ib'==3){
			local retailer="coles"
		}
		if(`ib'==4){
			local retailer="wool"
		}
		if(`ib'==5){
			local retailer="sev"
		}
	* fill days when no prices were updated
	rename pavg _pavg
	ipolate _pavg t, gen(pavg)
	replace pavg = round(pavg, .1)

	* Cycle peak
	rangestat (max) _pmax1 = pavg, interval(t -2 2) 	/* find local maximum */
	rangestat (min) _pmin1 = pavg, interval(t -3 0) 	/* find min price in past three days */
	gen _cyc_peak1 = (pavg == _pmax1) & (pavg - _pmin1 > 2)
	rangestat (max) _pmax2 = pavg, interval(t -5 5) 	/* find local maximum */
	rangestat (min) _pmin2 = pavg, interval(t -5 0) 	/* find min price in past four days */
	gen _cyc_peak2 = (pavg == _pmax2) & (pavg - _pmin2 > 2)

	gen  `retailer'_cyc_peak = .
	replace `retailer'_cyc_peak = 1 if _cyc_peak1 == 1 & pavg > pavg[_n-1] & t < td(1jan2014) 
	replace `retailer'_cyc_peak = 1 if _cyc_peak2 == 1 & pavg > pavg[_n-1] & t >= td(1jan2014)
	replace `retailer'_cyc_peak = . if _n <= 5
	replace `retailer'_cyc_peak = . if _n > _N-5
	replace `retailer'_cyc_peak = 0 if missing(`retailer'_cyc_peak)
	* create cycle id based on cycle peak
	gen `retailer'_cyc_id = sum(`retailer'_cyc_peak)
	bysort `retailer'_cyc_id (t): gen `retailer'_cyc_day = _n
	bysort `retailer'_cyc_id (t): egen `retailer'_cyc_len = count(t)
	drop _*

	* merge market-level cycle back into the daily data 
	keep t `retailer'_cyc_peak `retailer'_cyc_id `retailer'_cyc_day `retailer'_cyc_len
	merge 1:m t using Output/is_mel_p_2005_2019.dta
	sort id t 
	order `retailer'_cyc_peak `retailer'_cyc_id `retailer'_cyc_day `retailer'_cyc_len, last
	drop _*

	save Output/is_mel_p_2005_2019.dta, replace
	
}






